library(tidyverse)
library(skimr)
library(knitr)
library(kableExtra)
library(rvest)
library(reshape2)
library(gganimate)
library(magick)
SW <- readr::read_csv('https://raw.githubusercontent.com/youmrg/data_R/main/SW.csv')
SW
## # A tibble: 6,201 x 6
## gender event medal athlete abb year
## <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 Men 25 m Freestyle 1A Gold KENNY Mike GBR 1980
## 2 Men 25 m Freestyle 1A Silver KANTOLA Pekka FIN 1980
## 3 Men 25 m Freestyle 1A Bronze TIETZE H. FRG 1980
## 4 Men 25 m Freestyle 1B Gold BURGER M. CAN 1980
## 5 Men 25 m Freestyle 1B Silver SLUPE G. USA 1980
## 6 Men 25 m Freestyle 1B Bronze MAKI Eero FIN 1980
## 7 Men 25 m Freestyle 1C Gold SMYK Zbigniew POL 1980
## 8 Men 25 m Freestyle 1C Silver EMMEL Manfred FRG 1980
## 9 Men 25 m Freestyle 1C Bronze OCKVIRK Robert USA 1980
## 10 Men 50 m Freestyle CP C Gold ADLER Kare NOR 1980
## # ... with 6,191 more rows
counter les medailles pour chaque pays
medal_count<- SW %>% filter(!is.na(medal))%>%
group_by(abb, medal) %>%
summarize(Count=length(medal))
medal_count
## # A tibble: 172 x 3
## # Groups: abb [67]
## abb medal Count
## <chr> <chr> <int>
## 1 ARG Bronze 9
## 2 ARG Gold 5
## 3 ARG Silver 10
## 4 AUS Bronze 160
## 5 AUS Gold 147
## 6 AUS Silver 158
## 7 AUT Bronze 2
## 8 AUT Gold 2
## 9 AUT Silver 4
## 10 AZE Gold 1
## # ... with 162 more rows
ordonner les pays par nombre de medailles
ord_med <- medal_count %>%
group_by(abb) %>%
summarize(Total=sum(Count)) %>%
arrange(Total) %>%
select(abb)
medal_count$abb <- factor(medal_count$abb, levels=ord_med$abb)
ord_med
## # A tibble: 67 x 1
## abb
## <chr>
## 1 BAH
## 2 BUL
## 3 KAZ
## 4 LTU
## 5 MAR
## 6 TTO
## 7 VIE
## 8 IPP
## 9 SLO
## 10 TCH
## # ... with 57 more rows
le plot
ggplot(medal_count, aes(x=abb, y=Count, fill=medal)) +
geom_col() +
coord_flip() +
scale_fill_manual(values=c("gold1","gray70","gold4")) +
ggtitle("les medailles de chaque pays dans l'histoire de la compéttition ") +
theme(plot.title = element_text(hjust = 0.5))
ggplot(SW,aes(x= gender ,fill= medal))+
geom_bar()+
scale_fill_manual(values=c("gold1","gray70","gold4")) +
ggtitle("nombre de medailles par sex ") +
theme(plot.title = element_text(hjust = 0.5))
ggplot(SW,aes(x= gender ,fill= medal))+
facet_wrap(~ year)+
geom_bar()+
scale_fill_manual(values=c("gold1","gray70","gold4")) +
ggtitle("nombre de medailles par sex pour chaque année ") +
theme(plot.title = element_text(hjust = 0.5))
noc <- readr::read_csv('https://raw.githubusercontent.com/youmrg/data_R/main/noc_regions.csv')
noc = noc %>%
rename(abb = NOC)
noc
## # A tibble: 230 x 3
## abb region notes
## <chr> <chr> <chr>
## 1 AFG Afghanistan <NA>
## 2 AHO Curacao Netherlands Antilles
## 3 ALB Albania <NA>
## 4 ALG Algeria <NA>
## 5 AND Andorra <NA>
## 6 ANG Angola <NA>
## 7 ANT Antigua Antigua and Barbuda
## 8 ANZ Australia Australasia
## 9 ARG Argentina <NA>
## 10 ARM Armenia <NA>
## # ... with 220 more rows
Add regions to data and remove missing points
data_regions <- SW %>%
left_join(noc,by="abb") %>%
filter(!is.na(region))
sous ensemble pour les jeux de 1980 et 2016,counter les athletes de chaque pays.
rio <- data_regions %>%
filter(year == "2016") %>%
group_by(region) %>%
summarize(Rio = length(unique(athlete)))
Arnhem_et_Veenendaal<- data_regions %>%
filter(year == "1980") %>%
group_by(region) %>%
summarize(Arnhem = length(unique(athlete)))
Create data for mapping
world <- map_data("world")
mapdat <- tibble(region=unique(world$region))
mapdat <- mapdat %>%
left_join(Arnhem_et_Veenendaal, by="region") %>%
left_join(rio, by="region")
mapdat$Arnhem[is.na(mapdat$Arnhem)] <- 0
mapdat$Rio[is.na(mapdat$Rio)] <- 0
world <- left_join(world, mapdat, by="region")
Plot: Arnhem et Veenendaal 1980
ggplot(world, aes(x = long, y = lat, group = group)) +
geom_polygon(aes(fill = Arnhem)) +
labs(title = "Arnhem et Veenendaal 1980",
x = NULL, y = NULL) +
theme(axis.ticks = element_blank(),
axis.text = element_blank(),
panel.background = element_rect(fill = "navy"),
plot.title = element_text(hjust = 0.5)) +
guides(fill=guide_colourbar(title="Athletes")) +
scale_fill_gradient2(low="white",high = "red")
Plot: Rio 2016
ggplot(world, aes(x = long, y = lat, group = group)) +
geom_polygon(aes(fill = Rio)) +
labs(title = "Rio 2016",
x = NULL, y = NULL) +
theme(axis.ticks = element_blank(),
axis.text = element_blank(),
panel.background = element_rect(fill = "navy"),
plot.title = element_text(hjust = 0.5)) +
guides(fill=guide_colourbar(title="Athletes")) +
scale_fill_gradient2(low="white",high = "red")
data_abb_medal <- dcast(medal_count, abb ~ medal)
data_abb_medal[is.na(data_abb_medal)] <- 0
data_abb_medal
## abb Bronze Gold Silver
## 1 BAH 1 0 0
## 2 BUL 0 0 1
## 3 KAZ 0 1 0
## 4 LTU 0 0 1
## 5 MAR 1 0 0
## 6 TTO 1 0 0
## 7 VIE 0 0 1
## 8 IPP 0 1 1
## 9 SLO 2 0 0
## 10 TCH 1 0 1
## 11 KUW 2 0 1
## 12 LUX 0 1 2
## 13 CRO 4 0 0
## 14 CYP 1 2 1
## 15 HKG 3 1 0
## 16 JAM 0 1 3
## 17 CUB 2 1 2
## 18 PER 2 2 1
## 19 SGP 1 3 1
## 20 ZIM 3 0 2
## 21 AUT 2 2 4
## 22 AZE 0 1 7
## 23 THA 4 1 3
## 24 EGY 6 1 2
## 25 POR 6 0 3
## 26 SVK 3 2 4
## 27 EST 3 2 5
## 28 COL 4 2 5
## 29 SUI 8 0 4
## 30 UZB 6 2 4
## 31 EUN 6 4 3
## 32 FRO 5 1 7
## 33 KOR 6 7 2
## 34 YUG 9 3 6
## 35 URS 9 0 11
## 36 ARG 9 5 10
## 37 IRL 7 9 9
## 38 CZE 14 12 4
## 39 BEL 13 6 12
## 40 FIN 19 5 13
## 41 GRE 11 10 17
## 42 BLR 9 21 14
## 43 ISL 27 14 8
## 44 RSA 13 24 15
## 45 MEX 22 24 12
## 46 NZL 14 30 19
## 47 ITA 25 18 30
## 48 HUN 33 32 23
## 49 JPN 42 35 25
## 50 ISR 41 31 41
## 51 RUS 42 33 42
## 52 NOR 35 54 43
## 53 BRA 47 35 56
## 54 DEN 67 37 40
## 55 FRG 39 63 56
## 56 GER 61 67 76
## 57 UKR 78 78 65
## 58 SWE 54 95 94
## 59 POL 81 91 89
## 60 FRA 93 107 103
## 61 NED 94 117 103
## 62 CHN 107 150 119
## 63 ESP 140 118 122
## 64 CAN 118 165 119
## 65 AUS 160 147 158
## 66 USA 192 241 185
## 67 GBR 212 204 252
no_gold_data <- subset(data_abb_medal, Gold == 0 & Silver>0 & Bronze>0)
print("les pays qui n'ont pas de médaille d'or mais ils ont les autres")
## [1] "les pays qui n'ont pas de médaille d'or mais ils ont les autres"
no_gold_data$abb
## [1] TCH KUW ZIM POR SUI URS
## 67 Levels: BAH BUL KAZ LTU MAR TTO VIE IPP SLO TCH KUW LUX CRO CYP HKG ... GBR
all_medal_sex <- SW%>% group_by(abb, medal, gender) %>%
summarise(total = n())
all_medal_sex.wide <- dcast(all_medal_sex, abb ~ medal+gender)
all_medal_sex.wide[is.na(all_medal_sex.wide)] <- 0
all_medal_sex.wide
## abb Bronze_Men Bronze_Mixed Bronze_Women Gold_Men Gold_Mixed Gold_Women
## 1 ARG 2 0 7 0 0 5
## 2 AUS 63 0 97 72 0 75
## 3 AUT 2 0 0 2 0 0
## 4 AZE 0 0 0 0 0 1
## 5 BAH 0 0 1 0 0 0
## 6 BEL 7 0 6 4 0 2
## 7 BLR 9 0 0 21 0 0
## 8 BRA 39 0 8 31 0 4
## 9 BUL 0 0 0 0 0 0
## 10 CAN 53 0 65 73 0 92
## 11 CHN 74 0 33 105 8 37
## 12 COL 4 0 0 2 0 0
## 13 CRO 4 0 0 0 0 0
## 14 CUB 2 0 0 1 0 0
## 15 CYP 0 0 1 0 0 2
## 16 CZE 8 0 6 5 0 7
## 17 DEN 43 0 24 28 0 9
## 18 EGY 6 0 0 1 0 0
## 19 ESP 80 0 60 69 0 49
## 20 EST 1 0 2 0 0 2
## 21 EUN 4 0 2 4 0 0
## 22 FIN 6 0 13 1 0 4
## 23 FRA 55 0 38 59 0 48
## 24 FRG 32 0 7 36 0 27
## 25 FRO 0 0 5 0 0 1
## 26 GBR 84 0 128 119 0 85
## 27 GER 23 0 38 25 0 42
## 28 GRE 10 0 1 10 0 0
## 29 HKG 2 0 1 1 0 0
## 30 HUN 22 0 11 25 0 7
## 31 IPP 0 0 0 1 0 0
## 32 IRL 6 0 1 6 0 3
## 33 ISL 8 0 19 6 0 8
## 34 ISR 32 0 9 25 0 6
## 35 ITA 21 0 4 14 0 4
## 36 JAM 0 0 0 0 0 1
## 37 JPN 33 0 9 12 0 23
## 38 KAZ 0 0 0 0 0 1
## 39 KOR 6 0 0 7 0 0
## 40 KUW 2 0 0 0 0 0
## 41 LTU 0 0 0 0 0 0
## 42 LUX 0 0 0 1 0 0
## 43 MAR 1 0 0 0 0 0
## 44 MEX 11 0 11 11 0 13
## 45 NED 57 0 37 71 0 46
## 46 NOR 17 0 18 34 0 20
## 47 NZL 9 0 5 11 0 19
## 48 PER 2 0 0 2 0 0
## 49 POL 56 0 25 60 0 31
## 50 POR 3 0 3 0 0 0
## 51 RSA 11 0 2 10 0 14
## 52 RUS 33 0 9 22 0 11
## 53 SGP 0 0 1 0 0 3
## 54 SLO 2 0 0 0 0 0
## 55 SUI 7 0 1 0 0 0
## 56 SVK 2 0 1 2 0 0
## 57 SWE 28 0 26 39 0 56
## 58 TCH 1 0 0 0 0 0
## 59 THA 4 0 0 1 0 0
## 60 TTO 0 0 1 0 0 0
## 61 UKR 52 7 19 62 0 16
## 62 URS 3 0 6 0 0 0
## 63 USA 84 0 108 89 0 152
## 64 UZB 3 0 3 1 0 1
## 65 VIE 0 0 0 0 0 0
## 66 YUG 8 0 1 3 0 0
## 67 ZIM 1 0 2 0 0 0
## Silver_Men Silver_Mixed Silver_Women
## 1 3 0 7
## 2 78 0 80
## 3 4 0 0
## 4 3 0 4
## 5 0 0 0
## 6 7 0 5
## 7 14 0 0
## 8 44 6 6
## 9 0 0 1
## 10 50 0 69
## 11 99 0 20
## 12 5 0 0
## 13 0 0 0
## 14 2 0 0
## 15 0 0 1
## 16 0 0 4
## 17 27 0 13
## 18 2 0 0
## 19 69 0 53
## 20 0 0 5
## 21 2 0 1
## 22 5 0 8
## 23 51 0 52
## 24 41 0 15
## 25 0 0 7
## 26 119 0 133
## 27 25 0 51
## 28 15 0 2
## 29 0 0 0
## 30 15 0 8
## 31 1 0 0
## 32 4 0 5
## 33 4 0 4
## 34 35 0 6
## 35 23 0 7
## 36 0 0 3
## 37 21 0 4
## 38 0 0 0
## 39 2 0 0
## 40 1 0 0
## 41 1 0 0
## 42 2 0 0
## 43 0 0 0
## 44 6 0 6
## 45 57 0 46
## 46 26 0 17
## 47 8 0 11
## 48 1 0 0
## 49 63 0 26
## 50 0 0 3
## 51 13 0 2
## 52 32 0 10
## 53 0 0 1
## 54 0 0 0
## 55 4 0 0
## 56 0 0 4
## 57 44 0 50
## 58 1 0 0
## 59 3 0 0
## 60 0 0 0
## 61 41 0 24
## 62 9 0 2
## 63 62 0 123
## 64 1 0 3
## 65 1 0 0
## 66 6 0 0
## 67 0 0 2
no_women_gold <- subset(all_medal_sex.wide, Gold_Women ==0 & Gold_Men>0 )
no_women_gold
## abb Bronze_Men Bronze_Mixed Bronze_Women Gold_Men Gold_Mixed Gold_Women
## 3 AUT 2 0 0 2 0 0
## 7 BLR 9 0 0 21 0 0
## 12 COL 4 0 0 2 0 0
## 14 CUB 2 0 0 1 0 0
## 18 EGY 6 0 0 1 0 0
## 21 EUN 4 0 2 4 0 0
## 28 GRE 10 0 1 10 0 0
## 29 HKG 2 0 1 1 0 0
## 31 IPP 0 0 0 1 0 0
## 39 KOR 6 0 0 7 0 0
## 42 LUX 0 0 0 1 0 0
## 48 PER 2 0 0 2 0 0
## 56 SVK 2 0 1 2 0 0
## 59 THA 4 0 0 1 0 0
## 66 YUG 8 0 1 3 0 0
## Silver_Men Silver_Mixed Silver_Women
## 3 4 0 0
## 7 14 0 0
## 12 5 0 0
## 14 2 0 0
## 18 2 0 0
## 21 2 0 1
## 28 15 0 2
## 29 0 0 0
## 31 1 0 0
## 39 2 0 0
## 42 2 0 0
## 48 1 0 0
## 56 0 0 4
## 59 3 0 0
## 66 6 0 0
print("countries where women never won gold medal but men has")
## [1] "countries where women never won gold medal but men has"
no_women_gold$abb
## [1] "AUT" "BLR" "COL" "CUB" "EGY" "EUN" "GRE" "HKG" "IPP" "KOR" "LUX" "PER"
## [13] "SVK" "THA" "YUG"
no_men_gold <- subset(all_medal_sex.wide, Gold_Women>0 & Gold_Men==0 )
no_men_gold
## abb Bronze_Men Bronze_Mixed Bronze_Women Gold_Men Gold_Mixed Gold_Women
## 1 ARG 2 0 7 0 0 5
## 4 AZE 0 0 0 0 0 1
## 15 CYP 0 0 1 0 0 2
## 20 EST 1 0 2 0 0 2
## 25 FRO 0 0 5 0 0 1
## 36 JAM 0 0 0 0 0 1
## 38 KAZ 0 0 0 0 0 1
## 53 SGP 0 0 1 0 0 3
## Silver_Men Silver_Mixed Silver_Women
## 1 3 0 7
## 4 3 0 4
## 15 0 0 1
## 20 0 0 5
## 25 0 0 7
## 36 0 0 3
## 38 0 0 0
## 53 0 0 1
print("countries where men never won gold medal but women has")
## [1] "countries where men never won gold medal but women has"
no_men_gold$abb
## [1] "ARG" "AZE" "CYP" "EST" "FRO" "JAM" "KAZ" "SGP"
# medal_continent
continent <-readr::read_csv('https://raw.githubusercontent.com/youmrg/data_R/main/data.csv')
continent = continent %>%
rename(abb = Three_Letter_Country_Code)
medal_continent <- SW %>%
left_join(continent,by="abb") %>%
filter(!is.na(Continent_Name))
medal_continent<- medal_continent %>% filter(!is.na(medal))%>%
group_by(year,Continent_Name) %>%
summarize(Count=length(medal))
medal_continent
## # A tibble: 55 x 3
## # Groups: year [10]
## year Continent_Name Count
## <dbl> <chr> <int>
## 1 1980 Africa 1
## 2 1980 Asia 25
## 3 1980 Europe 238
## 4 1980 North America 119
## 5 1980 Oceania 13
## 6 1980 South America 13
## 7 1984 Africa 1
## 8 1984 Asia 38
## 9 1984 Europe 455
## 10 1984 North America 212
## # ... with 45 more rows
sum_medal_cont <- medal_continent %>%
group_by(Continent_Name) %>%
summarize(nombre_de_medailles=sum(Count))
sum_medal_cont
## # A tibble: 6 x 2
## Continent_Name nombre_de_medailles
## <chr> <int>
## 1 Africa 10
## 2 Asia 765
## 3 Europe 2759
## 4 North America 1088
## 5 Oceania 528
## 6 South America 178
pie_chart<- sum_medal_cont %>%
mutate(perc = `nombre_de_medailles` / sum(`nombre_de_medailles`)) %>%
arrange(perc) %>%
mutate(labels = scales::percent(perc))
pie_chart
## # A tibble: 6 x 4
## Continent_Name nombre_de_medailles perc labels
## <chr> <int> <dbl> <chr>
## 1 Africa 10 0.00188 0.2%
## 2 South America 178 0.0334 3.3%
## 3 Oceania 528 0.0991 9.9%
## 4 Asia 765 0.144 14.4%
## 5 North America 1088 0.204 20.4%
## 6 Europe 2759 0.518 51.8%
ggplot(pie_chart, aes(x = "", y = perc, fill = Continent_Name)) +
geom_col() +
coord_polar(theta = "y")
le graphique animé :
WP3 <- ggplot(data = medal_continent, aes(x = year, y = Count, group=Continent_Name, color=Continent_Name)) +
geom_line() +
geom_point() +
ggtitle("Nombre de médailles entre 1980 et 2016") +
ylab("Nombre de médailles") +
xlab("Année")+
theme_classic()+
view_follow(fixed_x = TRUE,
fixed_y = TRUE) +
transition_reveal(year)
WP3 <- animate(WP3, end_pause = 15)
WP3
WP <- ggplot(data = medal_continent) +
geom_col(mapping = aes(x = Continent_Name, y = Count),
fill = "darkcyan") +
theme_classic() +
xlab("Région") +
ylab("Nombre de téléphones (en milliers)") +
transition_states(year,
transition_length = 2,
state_length = 1,
wrap = TRUE) +
ggtitle("Année : {closest_state}")
WP
## `summarise()` has grouped output by 'event'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'year', 'abb'. You can override using the `.groups` argument.